clear all

																	*Replication file for Figures in "The Long-Run Educational Benefits of High-Achieving Classrooms"

**Combine 2015, 2016 2017 cohorts into one.
use "C:\Users\X\2015data.dta"
gen year=2015
append using "C:\Users\X\2016data.dta", force
replace year =2016 if year==.
append using "C:\Users\X\2017data.dta", force
replace year =2017 if year==.
*drop undidentified IDs
drop if unique_ID==99999
*genrate new uniques ID so IDs dont overlap in both datasetss
gen newID = unique_ID*year

*Generate running variable for RD*
gen running = Total_mark if Exam==1
bys newID: egen running1 = mean(running)

*Normalize cutoffs by year
*Year 2017 cutoff
gen normrunning = running1-425 if year==2017 
*year 2016 cutoff
replace normrunning = running1-467 if year==2016 
* Year 2015 cutoff 
replace normrunning = running1-480 if year==2015  

*Need to make changes to year 2017 top classroom which has coding error (top_class==2)
replace Top_class = 0 if Top_class==2 & year==2017

*generate Gender dummy
destring Gender, gen(sex) force
*gen dummies for each year
tab year, gen(years)


*Generate bins for RD graphs
gen bin=.
	
quietly forvalues j = 0 (5) 75 {
replace bin=`j'+2.5 if normrunning>=`j' & normrunning<`j'+5
} 

quietly forvalues j = -75 (5) -5 {
replace bin=`j'+2.5 if normrunning>=`j' & normrunning<`j'+5
} 


*Generating high school entrance scores in main subjects (Math, English and Chinese) as control*
gen hsscore1 = Chinese + Math + English if Exam==0
*Below is to replace missing with whatever score we have
replace hsscore1 = Total_mark if Exam==0 & hsscore1==.
bys newID: egen hsscore = mean(hsscore1)

**Note to use first stage in first year of high school (since can be in different section in third year)
gen Top_classsec = Top_class if Exam==1
bys newID: egen Top_class1 = mean(Top_classsec)

*Variables for Regressions
  gen treatment = 1 if normrunning >=0 & normrunning!=.
replace treatment = 0 if normrunning <0
gen slope = normrunning*treatment

*Generate urban versus rural variable*


bys newID: egen urban2 = mean(urban)
replace urban2 = 0 if urban2==.
drop urban
ren urban2 urban



exit




		*****************************************************************************************************************************************
		***************************************************Figure 1----First Stage***************************************************************
		*****************************************************************************************************************************************
		
		bys bin: egen meanTC = mean(Top_class) if Exam==1

twoway (scatter  meanTC bin if normrunning>=-75 & normrunning<=75 & Exam==1, xlabel(-75 (25) 75)) (lfit Top_class normrunning if normrunning<0 & normrunning>=-75 & Exam==1, clcolor(black)) ///
(lfit  Top_class normrunning if normrunning >=0 & normrunning<=75 & Exam==1, clcolor(black)), xline(0) ///
 xtitle("Normalized Score in Classroom Placement Exam") ytitle("Likelihood of Being in High-Achieving Class")  ///
  legend (off) graphregion(fcolor(white) lcolor(white)) 
  
  graph export Firststage.png, replace
  
  
  	

  
  	  *****************************************************************************************************************************************
		***************************************************Figure 2----1st year High School Grades ***************************************************************
		*****************************************************************************************************************************************
  
  
		*A) Generate standardized Mathematics scores by year

	bys newID: egen firstyearMath = mean(Math) if Exam==12 | Exam==13 | Exam==14 | Exam==15 | Exam==16 | Exam==17 
	
		egen stmat1 = std(firstyearMath) if year==2015
		egen stmat2 = std(firstyearMath) if year==2016
		egen stmat3 = std(firstyearMath) if year==2017
		
		gen stmath = stmat1
		replace stmath = stmat2 if year==2016
		replace stmath = stmat3 if year==2017
    
  
  bys bin: egen meanstmath = mean(stmath) if Exam==12

  
																********FIGURE 2A***************
																
twoway (scatter  meanstmath bin if normrunning>=-75 & normrunning<=75 & Exam==12 , xlabel(-75 (25) 75)) (lfit stmath normrunning if normrunning<0 & normrunning>=-75 & Exam==12, clcolor(black)) ///
(lfit stmath normrunning if normrunning >=0 & normrunning<=75 & Exam==12, clcolor(black)), xline(0) ///
 xtitle("Normalized Score in Classroom Placement Exam") ytitle("1st year Math Grades")  ///
  legend (off) graphregion(fcolor(white) lcolor(white)) 
		
        graph export Figure5a.png, replace
  
  
  
  *B) Generate standardized Chinese scores by year

			bys newID: egen firstyearChinese = mean(Chinese) if Exam==12 | Exam==13 | Exam==14 | Exam==15 | Exam==16 | Exam==17 
			
					egen stchi1 = std(firstyearChinese) if year==2015
		egen stchi2 = std(firstyearChinese) if year==2016
		egen stchi3 = std(firstyearChinese) if year==2017
		
		gen stChinese = stchi1 
		replace stChinese = stchi2 if year==2016
		replace stChinese = stchi3 if year==2017
  
  bys bin: egen meanstChinese = mean(stChinese) if Exam==12
  
  																********FIGURE 2B***************


twoway (scatter  meanstChinese bin if normrunning>=-75 & normrunning<=75 & Exam==12 , xlabel(-75 (25) 75)) (lfit stChinese normrunning if normrunning<0 & normrunning>=-75 & Exam==12, clcolor(black)) ///
(lfit stChinese normrunning if normrunning >=0 & normrunning<=75 & Exam==12, clcolor(black)), xline(0) ///
 xtitle("Normalized Score in Classroom Placement Exam") ytitle("1st year Chinese Grades")  ///
  legend (off) graphregion(fcolor(white) lcolor(white)) 
		
        graph export Figure5b.png, replace
  
  
  	*C)   *B) Generate standardized English scores by year

			
	bys newID: egen firstyearEnglish = mean(English) if  Exam==12 | Exam==13 | Exam==14 | Exam==15 | Exam==16 | Exam==17 
	
	
		egen steng1 = std(firstyearEnglish) if year==2015
		egen steng2 = std(firstyearEnglish) if year==2016
		egen steng3 = std(firstyearEnglish) if year==2017
		
		gen stenglish = steng1 
		replace stenglish = steng2 if year==2016
		replace stenglish = steng3 if year==2017
  
   bys bin: egen meanstenglish = mean(stenglish) if Exam==12
   
     														********FIGURE 2C***************


twoway (scatter  meanstenglish bin if normrunning>=-75 & normrunning<=75 & Exam==12 , xlabel(-75 (25) 75)) (lfit stenglish normrunning if normrunning<0 & normrunning>=-75 & Exam==12, clcolor(black)) ///
(lfit stenglish normrunning if normrunning >=0 & normrunning<=75 & Exam==12, clcolor(black)), xline(0) ///
 xtitle("Normalized Score in Classroom Placement Exam") ytitle("1st year English Grades")  ///
  legend (off) graphregion(fcolor(white) lcolor(white)) 
		
        graph export Figure5c.png, replace
  

		
		
		
  
							*****************************************************************************************************************************************
							***************************************************Figure 3---Long run outcomes *********************************************************
							*****************************************************************************************************************************************
  
  
  
				*A) Standardized GAOKAO EXAM RESULTS
  
  *Creating standardized variables
 **Divide by science and arts track**
  
  *Science
  egen stgaok1sci = std(Total_mark) if Exam==99 & year==2015  & Division==1
		egen stgaok2sci = std(Total_mark) if Exam==99 & year==2016 & Division==1
	egen stgaok3sci = std(Total_mark) if Exam==99 & year==2017 & Division==1

	gen stgaok = stgaok1sci
			replace stgaok = stgaok2sci if year==2016 & Division==1
	replace stgaok = stgaok3sci if year==2017  & Division==1
	
	*Arts
	  egen stgaok1art = std(Total_mark) if Exam==99 & year==2015  & Division==0
		egen stgaok2art = std(Total_mark) if Exam==99 & year==2016 & Division==0
	egen stgaok3art = std(Total_mark) if Exam==99 & year==2017 & Division==0

	replace stgaok = stgaok1art if year==2015 & Division==0
		replace stgaok = stgaok2art  if year==2016  &  Division==0
	replace stgaok = stgaok3art if year==2017  & Division==0
	
	
		      bys bin: egen meanstgaok_mark = mean(stgaok) if Exam==99
			  
																						
															 ********FIGURE 3A***************

twoway (scatter meanstgaok_mark bin if normrunning>=-75 & normrunning<=75 & Exam==99 , xlabel(-75 (25) 75)) (lfit stgaok normrunning if normrunning<0 & normrunning>=-75 & Exam==99, clcolor(black)) ///
(lfit stgaok normrunning if normrunning >=0 & normrunning<=75 & Exam==99, clcolor(black)), xline(0) ///
 xtitle("Normalized Score in Classroom Placement Exam") ytitle("Standardized College Entrance Exam Score")  ///
  legend (off) graphregion(fcolor(white) lcolor(white)) 
  
      graph export Figure8a.png, replace

		*B) Likelihood of going to any Chinese college
		
	*generate college going variable
					bys newID: egen maxcoll1 = mean(if_FirstBatch) 
				gen College1 = 1 if Exam==99 
						replace College1 = 0 if Exam==99 & maxcoll==.
						
						
						bys bin: egen meancollege1 = mean(College1) if Exam==99
						
														 ********FIGURE 3B***************


twoway (scatter  meancollege1 bin if normrunning>=-75 & normrunning<=75 & Exam==99, , xlabel(-75 (25) 75) ylabel(0.75(0.1)1) ) (lfit College1 normrunning if normrunning<0 & normrunning>=-75 & Exam==99, clcolor(black)) ///
(lfit College1 normrunning if normrunning >=0 & normrunning<=75 & Exam==99, clcolor(black)), xline(0) ///
 xtitle("Normalized Score in Classroom Placement Exam") ytitle("Enroll in Any Chinese college")  ///
  legend (off) graphregion(fcolor(white) lcolor(white)) 
  
  
        graph export Figure8aa.png, replace

		
	
	     *C) Likelihood of attending First tier University (widest definition)
		 
		 *Generate variable
replace firsttieradmit = 0 if if_FirstBatch!= 1 

bys bin: egen meanfirsttier = mean(firsttieradmit) if Exam==99


														********FIGURE 3C***************

	
twoway (scatter meanfirsttier bin if normrunning>=-75 & normrunning<=75 & Exam==99 , xlabel(-75 (25) 75)) (lfit firsttieradmit normrunning if normrunning<0 & normrunning>=-75 & Exam==99, clcolor(black)) ///
(lfit firsttieradmit normrunning if normrunning >=0 & normrunning<=75 & Exam==99, clcolor(black)), xline(0) ///
 xtitle("Normalized Score in Classroom Placement Exam") ytitle("Enroll in a First-tier University")  ///
  legend (off) graphregion(fcolor(white) lcolor(white)) 
  
      graph export Figure8b.png, replace
	  
	     *D) Likelihood of attending Top 100 University
	
*Fixing errors with 211 schools (those who didnt enter first_tier). This is instnce where someone is in top 100 but not intop tier (which shouldnt be possible)
replace if_211 = 0 if newID == 1100736 & Exam==99
replace if_211 = 0 if newID == 62496 & Exam==99
replace if_211 = 0 if newID == 429408 & Exam==99
	
	*Generate top 100 variable
gen top100admit = 1 if if_211==1
replace top100admit = 0 if if_211!=1 
	
bys bin: egen meantop100admit = mean(top100admit) if Exam==99 
	
	
														********FIGURE 3D***************

twoway (scatter meantop100admit bin if normrunning>=-75 & normrunning<=75 & Exam==99 , xlabel(-75 (25) 75)) (lfit top100admit normrunning if normrunning<0 & normrunning>=-75 & Exam==99, clcolor(black)) ///
(lfit top100admit normrunning if normrunning >=0 & normrunning<=75 & Exam==99, clcolor(black)), xline(0) ///
 xtitle("Normalized Score in Classroom Placement Exam") ytitle("Enroll in a Top-100 University")  ///
  legend (off) graphregion(fcolor(white) lcolor(white)) 
  
      graph export Figure8c.png, replace	
	  
	  
	  		
	     *E) Likelihood of attending Top 40 University
		*Fixing errors with 985 schools (those who didnt enter first_tier). This is instnce where someone is in top 40 but not in something less ranked (which shouldnt be possible)
		replace if_985 =0 if if_985==1 & if_211==0
		
			*Generate top 40 variable

gen top40admit = 1 if if_985==1
replace top40admit = 0 if if_985!=1 


	
bys bin: egen meantop40admit = mean(top40admit) if Exam==99

														********FIGURE 3E***************

	
twoway (scatter meantop40admit bin if normrunning>=-75 & normrunning<=75 & Exam==99 , xlabel(-75 (25) 75)) (lfit top40admit normrunning if normrunning<0 & normrunning>=-75 & Exam==99, clcolor(black)) ///
(lfit top40admit normrunning if normrunning >=0 & normrunning<=75 & Exam==99, clcolor(black)), xline(0) ///
 xtitle("Normalized Score in Classroom Placement Exam") ytitle("Enroll in a Top-40 University")  ///
  legend (off) graphregion(fcolor(white) lcolor(white)) 
  
      graph export Figure8d.png, replace
	
	

			*****************************************************************************************************************************************
			****************************************************Figure 4----MECHANISMS***************************************************************
			*****************************************************************************************************************************************
	
	*1) Generate peer quality by class based on High school entrance exam*
	**Important note. This variale needs to be standardized to be comparale across years
	  *Standardize high school grades by year
	  egen stgaok11 = std(Total_mark) if Exam==0 & year==2015
		egen stgaok12 = std(Total_mark) if Exam==0 & year==2016
	egen stgaok13 = std(Total_mark) if Exam==0 & year==2017
	gen stanHS = stgaok11
			replace stanHS = stgaok12 if year==2016
	replace stanHS = stgaok13 if year==2017
	
	*Need to do this by year and Class*
	bys Class year: egen peerscore = mean(stanHS) if Exam==0
	
	  bys bin: egen meanpeerscore = mean(peerscore) if Exam==0
	  
	  														********FIGURE 4A***************

	  
	  twoway (scatter meanpeerscore bin if normrunning>=-75 & normrunning<=75 & Exam==0 , xlabel(-75 (25) 75)) (lfit peerscore normrunning if normrunning<0 & normrunning>=-75 & Exam==0, clcolor(black)) ///
(lfit peerscore normrunning if normrunning >=0 & normrunning<=75 & Exam==0, clcolor(black)), xline(0) ///
 xtitle("Normalized Score in Classroom Placement Exam") ytitle("Classroom Peer Quality")  ///
  legend (off) graphregion(fcolor(white) lcolor(white))
    
	graph export Peerquality.png, replace

  
  
  	*2) Generate Class size Discontinuity to see if any*
	preserve
	keep if Exam==0
	bys Class year: gen classsize = _N if Exam==0  & Class!="."

	 bys bin: egen meanclass = mean(classsize) if Exam==0
	 
	 	  													********FIGURE 4B**************


twoway (scatter meanclass bin if normrunning>=-75 & normrunning<=75 & Exam==0 , xlabel(-75 (25) 75)) (lfit classsize normrunning if normrunning<0 & normrunning>=-75 & Exam==0, clcolor(black)) ///
(lfit classsize normrunning if normrunning >=0 & normrunning<=75 & Exam==0, clcolor(black)), xline(0) ///
 xtitle("Normalized Score in Classroom Placement Exam") ytitle("Classroom Size")  ///
  legend (off) graphregion(fcolor(white) lcolor(white)) 
  
    graph export Classsize.png, replace

	restore
	
	